The goals / steps of this project are the following:
# import libraries to use
import numpy as np
import cv2
import glob
import matplotlib.pyplot as plt
%matplotlib inline
# Let's check how one of the calibration images look like
image = cv2.imread('./camera_cal/calibration2.jpg')
# in some images not all intersection points are visible
# Intersection point capturing
nx = 9
ny = 6
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
ret, corners = cv2.findChessboardCorners(gray, (nx, ny), None)
# Plot corner to verify
cv2.drawChessboardCorners(image, (nx, ny), corners, ret)
plt.imshow(image)
# Calculate camera matrix and distortion coeficient
# points array is always the same because we will discard images with less than nx y ny intersection points
# numpy array to hold real word coord. of points. Each coord is 3d with z=0
objp = np.zeros((nx * ny, 3), np.float32)
# reshape the data into a grid which is what opencv calibrateCamera needs
# done differently that in template for learning purpose
x_coord, y_coord = np.mgrid[0:nx,0:ny]
x_coord = x_coord.flatten(order='F')
y_coord = y_coord.flatten(order='F')
objp[:,0] = x_coord
objp[:,1] = y_coord
# Since opencv calibrateCamera takes an array of arrays of points (one array for image) we need to set a list
# Each element on that list is the array of points of an image
objpoints = [] # 3d points in real world space
# A structure repeating this patter but with point in the image plane is also needed for calibrateCamera
imgpoints = [] # 2d points in image plane.
# List of calibration images available
images = glob.glob('./camera_cal/calibration*.jpg')
# Loop images and add points in those where findChessBoardCorners is succeful
for image in images:
img = cv2.imread(image)
# Repeat steps done for a single image
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
ret, corners = cv2.findChessboardCorners(gray, (nx, ny), None)
# Check that corners have been found. From a previous check we know some calibration images will not work
if ret == True:
objpoints.append(objp)
imgpoints.append(corners)
# Get calibration values for the camera
ret, mtx, dist_coef, rvecs, tvecs = cv2.calibrateCamera(objpoints, imgpoints, gray.shape[::-1], None, None)
print('Camera Matrix:')
print(mtx)
print('Distortion Coefficients:')
print(dist_coef)
# Saving camera calibration to files for later use
np.save('camera_matrix', mtx)
np.save('distortion_coeff', dist_coef)
mtx
Let's try correcting distortion first on a calibration image. Given that those images are chessboards, they should be esay to check for correctness
# Lets check those results on a calibration image first
image = cv2.imread('./camera_cal/calibration1.jpg')
dst = cv2.undistort(image, mtx, dist_coef, None, mtx)
fig, ax = plt.subplots(1,2)
ax[0].imshow(image)
ax[1].imshow(dst)
fig.set_size_inches(11,4)
ax[0].axis("off")
ax[1].axis("off")
# Saving to output folder
cv2.imwrite('./output_images/calibration1_undistorted.jpg', dst)
Now let's use the same values in one of the road images
# Lets check those results on a calibration image first
image = cv2.imread('./test_images/test2.jpg')
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # convert for easier plotting
dst = cv2.undistort(image, mtx, dist_coef, None, mtx)
fig, ax = plt.subplots(1,2)
ax[0].imshow(image)
ax[1].imshow(dst)
fig.set_size_inches(11,4)
# Saving to output folder
dst = cv2.cvtColor(dst, cv2.COLOR_RGB2BGR) # converting back for output
cv2.imwrite('./output_images/test2_undistorted.jpg', dst)
We'll start by cheking how each channel look in both RGB and HSL color spaces.
# Let's read an image from the test image folder
image = cv2.imread('./test_images/test1.jpg')
# convert from BGR to RGB to make plotting consistent
rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# convert from BGR to HSL
hls_image = cv2.cvtColor(image, cv2.COLOR_BGR2HLS)
# Let's plot each channel to see how they look like
fig, ax = plt.subplots(3,2)
i = 0
rgb_channels = 'RGB'
hls_channels = 'HLS'
for a in ax:
a[0].imshow(rgb_image[:,:,i], cmap='gray')
a[0].set_title('Channel ' + rgb_channels[i])
a[1].imshow(hls_image[:,:,i], cmap='gray')
a[1].set_title('Channel ' + hls_channels[i])
i += 1
fig.set_size_inches(10,8)
In the notes it was the case that the S channel worked well for detecting the lines. It seems to be the case by looking at this images. Let's try to combine it with some of the color thresholding done in problem 1
# Define a function that takes an image and returns the thresholded. Not in binary form.
def thresh_image(img, thresh=(0, 255)):
filtered = img.copy()
filtered[filtered <= thresh[0]] = 0
filtered[filtered > thresh[1]] = 0
return filtered
# Define a function that given an image returns a binary mask
def to_binary(img):
binary = np.zeros_like(img)
binary[img > 0] = 1
return binary
# Thresholding channel s from a hls image
thresh_s = (100, 255)
filt_s = thresh_image(hls_image[:,:,2], thresh=thresh_s)
# Plot the result
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(24, 9))
f.tight_layout()
ax1.imshow(hls_image)
ax1.set_title('Original Image', fontsize=30)
ax2.imshow(filt_s, cmap='gray')
ax2.set_title('Filtered S', fontsize=30)
cv2.imwrite('./output_images/test1_color_filter_channel_S.jpg', filt_s)
# Thresholding channel r from a rgb image
thresh_r = (190,255)
filt_r = thresh_image(rgb_image[:,:,0], thresh=thresh_r)
# Plot the result
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(24, 9))
f.tight_layout()
ax1.imshow(rgb_image)
ax1.set_title('Original Image', fontsize=30)
ax2.imshow(filt_r, cmap='gray')
ax2.set_title('Filtered R', fontsize=30)
cv2.imwrite('./output_images/test1_color_filter_channel_R.jpg', filt_r)
We'll try to combine both filters in a single image. We'll use an average of the influence of each one. Binary thresholding is done on the combination. This is done to try to convert to binary as late as possible and to not loose information.
# Let's add values from both images filtered
add_im = filt_r.astype(int)
add_im += filt_s
# Normalize results to 255
add_im = add_im / add_im.max() * 255
# Create a thresholded binary image
thresh_color = (110, 255)
binary = to_binary(thresh_image(add_im, thresh=thresh_color))
# Plot the result
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(24, 9))
f.tight_layout()
ax1.imshow(rgb_image)
ax1.set_title('Original Image', fontsize=30)
ax2.imshow(binary, cmap='gray')
ax2.set_title('Thresholded Binary', fontsize=30)
cv2.imwrite('./output_images/test1_color_filter_mixed_thresholding_binary.jpg', binary * 255)
The result is already quite decent. However, let's try to improve it with information from gradients. Again, we will combine the information in numeric form, not in binary form.
We first define some functions to use
# This function comes from the quiz. It has been modified to take an already monochrome image and return
# a filtered version. The threshold filter is based on gradient direction
def dir_threshold(img, sobel_kernel=3, thresh=(0, np.pi/2)):
# Calculate the x and y gradients
grad_x = cv2.Sobel(img, cv2.CV_64F, 1, 0, ksize=sobel_kernel)
grad_y = cv2.Sobel(img, cv2.CV_64F, 0, 1, ksize=sobel_kernel)
# Take the absolute value of the gradient direction,
# apply a threshold, and create a binary image result
absgraddir = np.arctan2(np.absolute(grad_y), np.absolute(grad_x))
filtered = absgraddir.copy()
filtered[absgraddir <= thresh[0]] = 0
filtered[absgraddir > thresh[1]] = 0
return filtered
# This is a similar case but using the magnitude of the gradient instead of direction
def mag_thresh(img, sobel_kernel=3, thresh=(0, 255)):
grad_x = cv2.Sobel(img, cv2.CV_64F, 1, 0)
grad_y = cv2.Sobel(img, cv2.CV_64F, 0, 1)
# Calculate the magnitude
abs_grad = np.sqrt(grad_x**2 + grad_y**2)
# Scale to 8-bit (0 - 255) and convert to type = np.uint8
scaled_grad = np.uint8(255*abs_grad/np.max(abs_grad))
# Filter pixel that do not fit within thresholds
filtered = scaled_grad.copy()
filtered[scaled_grad <= thresh[0]] = 0
filtered[scaled_grad > thresh[1]] = 0
return filtered
# Let's first look at how this looks applied over the original image
gray = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2GRAY)
thresh_dir = (0.8, 1.2)
grad_dir = dir_threshold(gray, sobel_kernel=15, thresh=thresh_dir)
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(24, 9))
f.tight_layout()
ax1.imshow(rgb_image)
ax1.set_title('Original Image', fontsize=30)
ax2.imshow(grad_dir, cmap='gray')
ax2.set_title('Thresholded Direction', fontsize=30)
cv2.imwrite('./output_images/test1_gradient_direction_filtered.jpg', grad_dir/grad_dir.max() * 255)
# Let's first look at how this looks applied over the original image
gray = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2GRAY)
thresh_mag = (30,150)
grad_mag = mag_thresh(gray, sobel_kernel=9, thresh=(30,150))
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(24, 9))
f.tight_layout()
ax1.imshow(rgb_image)
ax1.set_title('Original Image', fontsize=30)
ax2.imshow(grad_mag, cmap='gray')
ax2.set_title('Thresholded Magnitude', fontsize=30)
cv2.imwrite('./output_images/test1_gradient_magnitud_filtered.jpg', grad_mag/grad_mag.max() * 255)
# Let's combine gradients and colors values from both images filtered
# for the magnitude of the different images to make sense they have to be normalized
# we can use a linear combination to make each aspect have a different influence
# It seems that using the color detection as a bitmask works better, it creates sharper features:
final_im = 1.8 * to_binary(thresh_image(add_im, thresh=thresh_color)) * 255
final_im += 1 * (grad_dir / grad_dir.max() * 255)
final_im += 2 * (grad_mag / grad_mag.max() * 255)
# Normalize results to 255
final_im = final_im / final_im.max() * 255
# Create a thresholded binary image of the combined image
thresh_final = (110, 255)
binary = to_binary(thresh_image(final_im, thresh=thresh_final))
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(24, 9))
f.tight_layout()
ax1.imshow(rgb_image)
ax1.set_title('Original Image', fontsize=30)
ax2.imshow(binary, cmap='gray')
ax2.set_title('Thresholded Mixed Image', fontsize=30)
cv2.imwrite('./output_images/test1_gradient_thresholded_binary.jpg', binary * 255)
Let put all the operations in a function. It should take a color image and return a binary map
def thresholding(img):
# values for parameters come from global variable from previous experiments. A good improvement would be
# to pass a dict or each separate parameters to apply
# we asume image is read in BGR form
rgb_image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# convert from BGR to HSL
hls_image = cv2.cvtColor(img, cv2.COLOR_BGR2HLS)
# Thresholding channel s from a hls image
filt_s = thresh_image(hls_image[:,:,2], thresh=thresh_s)
# Thresholding channel r from a rgb image
filt_r = thresh_image(rgb_image[:,:,0], thresh=thresh_r)
# Let's add values from both images filtered
add_im = filt_r.astype(int)
add_im += filt_s
# Normalize results to 255
add_im = add_im / add_im.max() * 255
# Calculate gradient thresholded images
gray = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2GRAY)
grad_mag = mag_thresh(gray, sobel_kernel=12, thresh=thresh_mag)
grad_dir = dir_threshold(gray, sobel_kernel=15, thresh=thresh_dir)
# Combine all images
final_im = 1.8 * to_binary(thresh_image(add_im, thresh=thresh_color)) * 255
final_im += 1 * (grad_dir / grad_dir.max() * 255)
final_im += 2 * (grad_mag / grad_mag.max() * 255)
# Normalize results to 255
final_im = final_im / final_im.max() * 255
# Create a thresholded binary image of the combined image.
# we also applied a gaussian blur to smooth out noise
blur_gray = cv2.GaussianBlur(final_im,(3, 3), 0)
binary = to_binary(thresh_image(blur_gray, thresh=thresh_final))
return binary
# Let's try it on different images
images = glob.glob('./test_images/*.jpg')
# list to hold converted images
treated_img = []
for i in range(len(images)):
file_name = images[i]
image = cv2.imread(file_name)
t = thresholding(image)
treated_img.append(t)
# save each with a name realted to the original
cv2.imwrite('./output_images/' + file_name.split('/')[-1].split('.')[0] + '_thresholded.jpg', t * 255)
col = 2
row = int(len(images) / col) + len(images) % col
fig, ax = plt.subplots(row, col)
for i in range(len(images)):
file_name = images[i]
ax[int(i/col), i % col].imshow(treated_img[i], cmap='gray')
ax[int(i/col), i % col].set_title('Thresholded Image: ' + file_name.split('/')[-1])
ax[int(i/col), i % col].axis('off')
fig.set_size_inches(10,16)
f.tight_layout()
We are going to warp an undistorted image to get a better view of the lanes. From this view, we would locate the lane easier and we will calculate its curvature.
# Let's undistort the reference images with the straight road
image = cv2.imread('./test_images/straight_lines1.jpg')
dst = cv2.undistort(image, mtx, dist_coef, None, mtx)
# Saving to output folder
cv2.imwrite('./output_images/straight_lines1_undistorted.jpg', dst)
image = cv2.imread('./test_images/straight_lines2.jpg')
dst = cv2.undistort(image, mtx, dist_coef, None, mtx)
# Saving to output folder
cv2.imwrite('./output_images/straight_lines2_undistorted.jpg', dst)
We take know positions on the image to warp it. Giving that the picture is taken in a fairly straight strech of road, we can assume the lines painted in the road to be straight. This gives four easily identifiable points.
# With pixel positions from those images we have:
src = np.array([[585, 460], [697, 460], [1044, 690], [259, 690]])
dest =np.array([[320, 0], [960, 0], [960, 720], [320, 720]])
# Calculate perspective transformation from perspective image to "flat" image
M = cv2.getPerspectiveTransform(src.astype(np.float32), dest.astype(np.float32))
# inverse transformation matrix from "flat" image back to perspective
Minv = cv2.getPerspectiveTransform(dest.astype(np.float32), src.astype(np.float32))
# save configuration
np.save('direct_persp_trans',M)
np.save('inverse_persp_trans', Minv)
# Let's paint those values on top of the image
image = cv2.imread('./output_images/straight_lines1_undistorted.jpg')
rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# transform image by warping
warped = cv2.warpPerspective(rgb_image, M, (rgb_image.shape[1], rgb_image.shape[0]), flags=cv2.INTER_LINEAR)
# draw polyline on top of original image for checking
cv2.polylines(rgb_image, [src], True, (255,0,0), 4)
# draw polyline on top of warped image for checking
cv2.polylines(warped, [dest], True, (255,0,0), 4)
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(24, 9))
f.tight_layout()
ax1.imshow(rgb_image)
ax1.set_title('Undistorted Image', fontsize=30)
ax2.imshow(warped)
ax2.set_title('Warped Image', fontsize=30)
cv2.imwrite('./output_images/straight_lines1_undistorted_warped.jpg', warped)
# Let's try the procces with one of the thresholded images
image = cv2.imread('./test_images/test4.jpg')
# convert from BGR to RGB
rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# The first step is to undistort the image
un_dis = cv2.undistort(image, mtx, dist_coef, None, mtx)
# Next we do color and gradient thresholding
thresholded = thresholding(un_dis)
# Now warp according to perspective
warped = cv2.warpPerspective(thresholded, M, (thresholded.shape[1], thresholded.shape[0]), flags=cv2.INTER_LINEAR)
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(24, 9))
f.tight_layout()
ax1.imshow(rgb_image)
ax1.set_title('Original Image', fontsize=30)
ax2.imshow(warped, cmap='gray')
ax2.set_title('Warped-Thresholded Image', fontsize=30)
cv2.imwrite('./output_images/test4_undistorted_thresholded_warped.jpg', warped * 255)
The approach I toke is a little different from what was shown in class. I will select a band of the image based on the maximum points in the histogram. From the pixels active in that vertical slice I fit a straight line as a first approximation. The I mask a band based on that approximation. I do a second, better approximation based on a second order polynomial.
There are many ways of creating a mask based on a straight line. Since we are already using imaging libraries from opencv, I will use it to create masks.
# This function takes the dimensions of an image, a line defined by two points and a band width and returns a binary mask
def line_mask(h=720, w=1280, point1=(0,0), point2=(720, 1280), band = 150):
mask = np.zeros((h, w), np.int8)
cv2.line(mask, point1, point2, (1,0,0), band)
return mask
# Let's see where the lane starts at the bottom of the picture
# we'll use the histogram technique
histogram = np.sum(warped[int(warped.shape[0]/3):,:], axis=0)
# finding the right and left peak of the histogram would give a good approximation of where the lane is
midpoint = np.int(histogram.shape[0]/2)
leftx_base = np.argmax(histogram[:midpoint])
rightx_base = np.argmax(histogram[midpoint:]) + midpoint
print('Taking left laneline start at: {:d}'.format(leftx_base))
print('Taking right laneline start at: {:d}'.format(rightx_base))
# First step in reducing uncertainty, fit a straight line on a band each side
half_bandwidth = 200
# Create a mask for our region of interest
left_mask= line_mask(h=warped.shape[0], w=warped.shape[1],
point1=(leftx_base, warped.shape[0]),
point2=(leftx_base, 0))
right_mask = line_mask(h=warped.shape[0], w=warped.shape[1],
point1=(rightx_base, warped.shape[0]),
point2=(rightx_base, 0))
left_lane = cv2.bitwise_and(warped, warped, mask=left_mask)
right_lane = cv2.bitwise_and(warped, warped, mask=right_mask)
# fit a straight line through our points
# Carefull with what is x and y depending if we use image or matrix form
y_fit, x_fit = np.nonzero(left_lane) # y_fit is first component, rows in matrix
l_lane_line = np.polyfit(y_fit, x_fit, 1) # x and y are inverted from a "normal" polynomial
y_fit, x_fit = np.nonzero(right_lane) # y_fit is first component, rows in matrix
r_lane_line = np.polyfit(y_fit, x_fit, 1) # x and y are inverted from a "normal" polynomial
# Plot line on top of image for verification. This whole part is only needed for debuging
debug_left = cv2.cvtColor(left_lane.astype(np.float32), cv2.COLOR_GRAY2RGB)
debug_right = cv2.cvtColor(right_lane.astype(np.float32), cv2.COLOR_GRAY2RGB)
left_base = (int(l_lane_line[0] * 720 + l_lane_line[1]) , 720)
left_top = (int(l_lane_line[1]) , 0)
right_base = (int(r_lane_line[0] * 720 + r_lane_line[1]) , 720)
right_top = (int(r_lane_line[1]) , 0)
cv2.line(debug_left, left_base, left_top, (1,0,0), 10)
cv2.line(debug_right, right_base, right_top, (1,0,0), 10)
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(24, 9))
f.tight_layout()
ax1.imshow(debug_left)
ax1.set_title('Left Lane Line', fontsize=30)
ax2.imshow(debug_right)
ax2.set_title('Right Lane Line', fontsize=30)
# Repeat the masking to improve the result but now with the line obtained before
# We'll also fit a degree two polynomial
# Create a mask for our region of interest
left_mask= line_mask(h=warped.shape[0], w=warped.shape[1], point1=left_base, point2=left_top)
right_mask = line_mask(h=warped.shape[0], w=warped.shape[1], point1=(right_base), point2=right_top)
left_lane = cv2.bitwise_and(warped, warped, mask=left_mask)
right_lane = cv2.bitwise_and(warped, warped, mask=right_mask)
# fit a degree two polynomial through our points
# Carefull with what is x and y depending if we use image or matrix form
y_fit, x_fit = np.nonzero(left_lane) # y_fit is first component, rows in matrix
l_polynomial = np.polyfit(y_fit, x_fit, 2) # x and y are inverted from a "normal" polynomial
y_fit, x_fit = np.nonzero(right_lane) # y_fit is first component, rows in matrix
r_polynomial = np.polyfit(y_fit, x_fit, 2) # x and y are inverted from a "normal" polynomial
# Plot polynomial on top of image for verification. This whole part is only needed for debuging
ploty = np.linspace(0, 719, num=720)
left_fit = l_polynomial[0] * ploty ** 2 + l_polynomial[1] * ploty + l_polynomial[2]
right_fit = r_polynomial[0] * ploty ** 2 + r_polynomial[1] * ploty + r_polynomial[2]
debug_left = cv2.cvtColor(left_lane.astype(np.float32), cv2.COLOR_GRAY2RGB)
debug_right = cv2.cvtColor(right_lane.astype(np.float32), cv2.COLOR_GRAY2RGB)
pts_left = np.array([np.transpose(np.vstack([left_fit, ploty]))])
pts_right = np.array([np.transpose(np.vstack([right_fit, ploty]))])
cv2.polylines(debug_left, np.int_([pts_left]), False, (0,1, 0), 10)
cv2.polylines(debug_right, np.int_([pts_right]), False, (0,1, 0), 10)
#cv2.polylines(warped, [dest], True, (255,0,0), 4)
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(24, 9))
f.tight_layout()
ax1.imshow(debug_left)
ax1.set_title('Left Lane Line', fontsize=30)
ax2.imshow(debug_right)
ax2.set_title('Right Lane Line', fontsize=30)
Now that we have polynomial fitted to both lane lines, we can estimate what the radius of curvature is for that part of the road. For that, we need to convert from pixel space back to world space. We get those measurements from what we used as source and destination points before, combined with estimates of lane measures
# Calculate radius for both left and right
lane_pixel_width = 960 - 320
ym_per_pixel = 30 / 720
xm_per_pixel = 3.7 / lane_pixel_width
# Scale pixels to meter and fit polynomial again
y_value = debug_left.shape[0] * ym_per_pixel
y_fit, x_fit = np.nonzero(left_lane)
l_polynomial_m = np.polyfit(y_fit * ym_per_pixel, x_fit * xm_per_pixel, 2)
y_fit, x_fit = np.nonzero(right_lane)
r_polynomial_m = np.polyfit(y_fit * ym_per_pixel, x_fit * xm_per_pixel, 2)
# assigment for clarity
A, B, C = l_polynomial_m
R_left = (1 + (2 * A * y_value + B) ** 2) ** (3 / 2) / np.absolute(2 * A)
A, B, C = r_polynomial_m
R_right = (1 + (2 * A * y_value + B) ** 2) ** (3 / 2) / np.absolute(2 * A)
print('Radius of curvature of left line: {:.1f}'.format(R_left))
print('Radius of curvature of right line: {:.1f}'.format(R_right))
# maybe an average is a better estimate
radius = (R_right + R_left) / 2
print('Estimated radius of curvature: {:.1f}'.format(radius))
To calculate what the position of the car is in the lane, we need to assume a certain position for the camera. As a starting point we would assume that the camera is mounted at the center line of the car.
We will use the values of the polynomial fitting done in world space at the bottom of the picture. The average of those values is the mid point of the lane. We compare that value with the mid point of the image
x_max = np.dot(r_polynomial_m, np.array([y_value**2, y_value, 1]))
x_min = np.dot(l_polynomial_m, np.array([y_value**2, y_value, 1]))
mid_lane = (x_max + x_min) / 2
mid_image = (1280 * xm_per_pixel) / 2
car_pos = mid_image - mid_lane
print('Car is {:.2f} meters from centerline of lane. Positive on the right side'.format(car_pos))
Let's put it all together and plot the result over a test image. We assume that camera calibration has already been done and it is kept in a matrix mtx.
Also, we have previously calculated a perspective transform that has been assigned to a global matrix M while its inverse is Minv
Let's define a function to put together the lane finding and fitting step. Also another that helps with ploting the lane that we've found
# This function takes an undistorted, wrapped, and thresholded image and a band estimate as inputs.
# It returns a tuple with left line polynomial and right line polynomial
def find_lane(img, bandwidth=200):
# Let's see where the lane starts at the bottom of the picture
histogram = np.sum(img[int(img.shape[0]/3):,:], axis=0)
# finding the right and left peak of the histogram would give a good approximation of where the lane is
midpoint = np.int(histogram.shape[0]/2)
leftx_base = np.argmax(histogram[:midpoint])
rightx_base = np.argmax(histogram[midpoint:]) + midpoint
# Create a mask for our region of interest
left_mask= line_mask(h=img.shape[0], w=img.shape[1],
point1=(leftx_base, img.shape[0]),
point2=(leftx_base, 0),
band = bandwidth)
right_mask = line_mask(h=img.shape[0], w=img.shape[1],
point1=(rightx_base, img.shape[0]),
point2=(rightx_base, 0),
band = bandwidth)
left_lane = cv2.bitwise_and(img, img, mask=left_mask)
right_lane = cv2.bitwise_and(img, img, mask=right_mask)
# fit a straight line through our points
# Carefull with what is x and y depending if we use image or matrix form
y_fit, x_fit = np.nonzero(left_lane) # y_fit is first component, rows in matrix
l_lane_line = np.polyfit(y_fit, x_fit, 1) # x and y are inverted from a "normal" polynomial
y_fit, x_fit = np.nonzero(right_lane) # y_fit is first component, rows in matrix
r_lane_line = np.polyfit(y_fit, x_fit, 1) # x and y are inverted from a "normal" polynomial
# Create reference point for new line
left_base = (int(l_lane_line[0] * 720 + l_lane_line[1]) , 720)
left_top = (int(l_lane_line[1]) , 0)
right_base = (int(r_lane_line[0] * 720 + r_lane_line[1]) , 720)
right_top = (int(r_lane_line[1]) , 0)
# Repeat the masking to improve the result but now with the line obtained before
# We'll also fit a degree two polynomial
# Update the mask for our region of interest
left_mask= line_mask(h=img.shape[0], w=img.shape[1], point1=left_base, point2=left_top)
right_mask = line_mask(h=img.shape[0], w=img.shape[1], point1=right_base, point2=right_top)
left_lane = cv2.bitwise_and(img, img, mask=left_mask)
right_lane = cv2.bitwise_and(img, img, mask=right_mask)
# fit a degree two polynomial through our points
# Carefull with what is x and y depending if we use image or matrix form
y_fit, x_fit = np.nonzero(left_lane) # y_fit is first component, rows in matrix
l_polynomial = np.polyfit(y_fit, x_fit, 2) # x and y are inverted from a "normal" polynomial
y_fit, x_fit = np.nonzero(right_lane) # y_fit is first component, rows in matrix
r_polynomial = np.polyfit(y_fit, x_fit, 2) # x and y are inverted from a "normal" polynomial
# return polynomial coefficients
return (l_polynomial, r_polynomial)
# This function takes an image assumed to be in RGB and plots a polynomial lane on top of it.
# The lane is expected as a tuple with a polynomial of degree two for left and another for right.
def plot_lane(img, lane):
y_value = img.shape[0]
ploty = np.linspace(0, y_value-1, num=y_value)
l_polynomial, r_polynomial = lane
left_fit = l_polynomial[0] * ploty ** 2 + l_polynomial[1] * ploty + l_polynomial[2]
right_fit = r_polynomial[0] * ploty ** 2 + r_polynomial[1] * ploty + r_polynomial[2]
# copy img to keep original
lane_warp = img.copy()
# zero the image
lane_warp[:] = 0
# Recast the x and y points into usable format for cv2.fillPoly()
pts_left = np.array([np.transpose(np.vstack([left_fit, ploty]))])
pts_right = np.array([np.flipud(np.transpose(np.vstack([right_fit, ploty])))])
pts = np.hstack((pts_left, pts_right))
cv2.fillPoly(lane_warp, np.int_([pts]), (0,255, 0))
# unwarp the image to the original space
lane = cv2.warpPerspective(lane_warp, Minv, (img.shape[1], img.shape[0]))
result = cv2.addWeighted(img, 1, lane, 0.3, 0)
return result
# Let's try the full procces with one of the test images
image = cv2.imread('./test_images/test3.jpg')
# convert from BGR to RGB
rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
# The first step is to undistort the image
un_dis = cv2.undistort(image, mtx, dist_coef, None, mtx)
# Next we do color and gradient thresholding
thresholded = thresholding(un_dis)
# Now warp according to perspective
warped = cv2.warpPerspective(thresholded, M, (thresholded.shape[1], thresholded.shape[0]), flags=cv2.INTER_LINEAR)
# Next find the polynomials that store the lane limits
lane = find_lane(warped, bandwidth=150)
im = plot_lane(rgb_image, lane)
# Let's plot the result
f, ax = plt.subplots(1, figsize=(24, 9))
ax.imshow(im)
# writing result images for all tests
images = glob.glob('./test_images/*.jpg')
for i in range(len(images)):
file_name = images[i]
image = cv2.imread(file_name)
# The first step is to undistort the image
un_dis = cv2.undistort(image, mtx, dist_coef, None, mtx)
# Next we do color and gradient thresholding
thresholded = thresholding(un_dis)
# Now warp according to perspective
warped = cv2.warpPerspective(thresholded, M, (thresholded.shape[1], thresholded.shape[0]), flags=cv2.INTER_LINEAR)
# Next find the polynomials that store the lane limits
lane = find_lane(warped, bandwidth=150)
im = plot_lane(image, lane)
cv2.imwrite('./output_images/' + file_name.split('/')[-1].split('.')[0] + '_results.jpg',im)